In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
In [21]:
os.getcwd()
Out[21]:
'/Users/aishwaryamaddimsetty/Downloads'
In [22]:
os.chdir('/Users/aishwaryamaddimsetty/Downloads')
In [23]:
data = pd.read_csv("DSA.csv")
In [24]:
data.head()
Out[24]:
age job marital education default housing loan contact month day_of_week ... pdays previous poutcome emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed ModelPrediction y
0 56 housemaid married basic.4y no no no telephone may mon ... 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 0.932750 no
1 57 services married high.school unknown no no telephone may mon ... 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 0.953579 no
2 37 services married high.school no yes no telephone may mon ... 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 0.945724 no
3 40 admin. married basic.6y no no no telephone may mon ... 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 0.933875 no
4 56 services married high.school no no yes telephone may mon ... 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 0.940996 no

5 rows × 22 columns

In [25]:
list(data.columns)
Out[25]:
['age',
 'job',
 'marital',
 'education',
 'default',
 'housing',
 'loan',
 'contact',
 'month',
 'day_of_week',
 'duration',
 'campaign',
 'pdays',
 'previous',
 'poutcome',
 'emp.var.rate',
 'cons.price.idx',
 'cons.conf.idx',
 'euribor3m',
 'nr.employed',
 'ModelPrediction',
 'y']
In [26]:
len(data.columns)
Out[26]:
22
In [27]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41188 entries, 0 to 41187
Data columns (total 22 columns):
age                41188 non-null int64
job                41188 non-null object
marital            41188 non-null object
education          41188 non-null object
default            41188 non-null object
housing            41188 non-null object
loan               41188 non-null object
contact            41188 non-null object
month              41188 non-null object
day_of_week        41188 non-null object
duration           41188 non-null int64
campaign           41188 non-null int64
pdays              41188 non-null int64
previous           41188 non-null int64
poutcome           41188 non-null object
emp.var.rate       41188 non-null float64
cons.price.idx     41188 non-null float64
cons.conf.idx      41188 non-null float64
euribor3m          41188 non-null float64
nr.employed        41188 non-null float64
ModelPrediction    41188 non-null float64
y                  41188 non-null object
dtypes: float64(6), int64(5), object(11)
memory usage: 6.9+ MB
In [28]:
data.isnull().sum()
Out[28]:
age                0
job                0
marital            0
education          0
default            0
housing            0
loan               0
contact            0
month              0
day_of_week        0
duration           0
campaign           0
pdays              0
previous           0
poutcome           0
emp.var.rate       0
cons.price.idx     0
cons.conf.idx      0
euribor3m          0
nr.employed        0
ModelPrediction    0
y                  0
dtype: int64
In [29]:
data_dup = data[data.duplicated(keep= "last")]
In [30]:
data_dup.shape
Out[30]:
(12, 22)
In [31]:
data.shape
Out[31]:
(41188, 22)
In [32]:
data_n = data.drop_duplicates()
In [33]:
data_n.shape
Out[33]:
(41176, 22)
In [34]:
41188-41176
Out[34]:
12
In [35]:
data_x = data.iloc[:, :-1]
In [36]:
data_x.shape
Out[36]:
(41188, 21)
In [37]:
data_y = data['y']
In [38]:
data_y
Out[38]:
0         no
1         no
2         no
3         no
4         no
5         no
6         no
7         no
8         no
9         no
10        no
11        no
12        no
13        no
14        no
15        no
16        no
17        no
18        no
19        no
20        no
21        no
22        no
23        no
24        no
25        no
26        no
27        no
28        no
29        no
        ... 
41158    yes
41159    yes
41160    yes
41161     no
41162     no
41163    yes
41164    yes
41165    yes
41166    yes
41167     no
41168     no
41169     no
41170     no
41171    yes
41172    yes
41173    yes
41174    yes
41175     no
41176     no
41177     no
41178    yes
41179     no
41180     no
41181    yes
41182     no
41183    yes
41184     no
41185     no
41186    yes
41187     no
Name: y, Length: 41188, dtype: object
In [39]:
import plotly.graph_objects as pl
In [40]:
import plotly.express as px

df = px.data.tips()

fig = px.pie(df, values='tip', names='day', color='day', color_discrete_map={'Thur':'lightcyan', 'Fri':'cyan', 'Sat':'royalblue', 'Sun':'darkblue'})

In [42]:
target_count = data['y'].value_counts()
target_count
Out[42]:
no     36548
yes     4640
Name: y, dtype: int64
In [43]:
colors = ['Red', 'Green']
trace = pl.Pie(labels =target_count.index, values = target_count.values, pull= [0.05], marker=dict(colors=colors)) 

layout = pl.Layout(title = "Subscribed to the Term Deposit", height = 200, legend= dict(x=1.1, y=1.3))



fig = pl.Figure(data=[trace], layout = layout)

fig.update_layout(height=500, width=700)
fig.show()

we can say the data is imbalanced

In [ ]:
#lien in the graph 

colors = ['Red', 'Green']
trace = pl.Pie(labels =target_count.index, values = target_count.values, pull= [0.05], marker=dict(colors=colors, line=dict(color='#000000', width=2))) 

layout = pl.Layout(title = "Subscribed to the Term Deposit", height = 200, legend= dict(x=1.1, y=1.3))



fig = pl.Figure(data=[trace], layout = layout)

fig.update_layout(height=500, width=700)
fig.show()
In [32]:
data.columns
Out[32]:
Index(['age', 'job', 'marital', 'education', 'default', 'housing', 'loan',
       'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome', 'emp.var.rate', 'cons.price.idx',
       'cons.conf.idx', 'euribor3m', 'nr.employed', 'ModelPrediction', 'y'],
      dtype='object')
In [41]:
#Building graphs to find the co relation between every two columns - for every column with the target variable ( realtion of what had closest relation to find the reault - if subscribed or not)

sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.barplot(x=data['y'], y = data['age'])
Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe1ab01add8>
In [46]:
Deposit = ['marital']

fig = pl.Figure(data=[
    pl.Bar(name = 'Yes', x = Deposit, y = [20, 14, 23]),
    pl.Bar(name='No', x=Deposit, y=[12,18,29])
])


# Change the bar mode

fig.update_layout(barmode='stack')
fig.show()
In [52]:
## shows visual reprensentation of teh data's attributes and it's values
marital=['married', 'single', 'divorced','unknown']

fig = pl.Figure([pl.Bar(x=marital, y=[24928, 11568, 4612,80])])
fig.show()
In [49]:
data['marital'].nunique()
Out[49]:
4
In [50]:
data['marital'].value_counts()
Out[50]:
married     24928
single      11568
divorced     4612
unknown        80
Name: marital, dtype: int64
In [ ]:
data_age = pd.data['age']
bins = [0]
In [75]:
data['age_groups'] = pd.cut(x=data['age'], bins=[19,20,29,39,49,59,69,79,89,99], labels=['teens','20s', '30s', '40s','50s', '60s','70s', '80s','90s'])

data['age_groups']
Out[75]:
0        50s
1        50s
2        30s
3        40s
4        50s
5        40s
6        50s
7        40s
8        20s
9        20s
10       40s
11       20s
12       20s
13       50s
14       30s
15       50s
16       30s
17       40s
18       50s
19       30s
20       30s
21       50s
22       50s
23       40s
24       30s
25       30s
26       50s
27       30s
28       50s
29       50s
        ... 
41158    30s
41159    30s
41160    30s
41161    30s
41162    60s
41163    30s
41164    50s
41165    30s
41166    30s
41167    30s
41168    30s
41169    60s
41170    40s
41171    30s
41172    30s
41173    60s
41174    60s
41175    30s
41176    30s
41177    50s
41178    60s
41179    60s
41180    30s
41181    30s
41182    20s
41183    70s
41184    40s
41185    50s
41186    40s
41187    70s
Name: age_groups, Length: 41188, dtype: category
Categories (9, object): [teens < 20s < 30s < 40s ... 60s < 70s < 80s < 90s]
In [72]:
data['age'].min()
data['age'].max()
Out[72]:
98
In [76]:
## shows visual reprensentation of teh data's attributes and it's values
deposit=['Yes', 'No']

fig = pl.Figure([pl.Bar(x=deposit, y=data['age_groups'])
                 
                 fig.show()
  File "<ipython-input-76-4e497719e7f5>", line 6
    fig.show()
      ^
SyntaxError: invalid syntax
In [79]:
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.barplot(x=data['y'], y=data['age'])
Out[79]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe1a969ae48>
In [83]:
#plotting histogram for numerical values:

numerical_columns = data.select_dtypes(include=['int'])
numerical_columns.hist(figsize=(10,12))
Out[83]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fe1ab8e0588>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe1ab7d52e8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7fe1ab33db38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe1ab3d34a8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7fe1acd10e10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe1a9ad07b8>]],
      dtype=object)

important step

In [86]:
#Small code to get counts of the data's catogorical values:

category_column = [i for i in data.columns if data[i].dtypes == 'object']

for column in category_column:
    print(column, '\n\n')
    print(data[column].value_counts())
    print("------------" *4)
job 


admin.           10422
blue-collar       9254
technician        6743
services          3969
management        2924
retired           1720
entrepreneur      1456
self-employed     1421
housemaid         1060
unemployed        1014
student            875
unknown            330
Name: job, dtype: int64
------------------------------------------------
marital 


married     24928
single      11568
divorced     4612
unknown        80
Name: marital, dtype: int64
------------------------------------------------
education 


university.degree      12168
high.school             9515
basic.9y                6045
professional.course     5243
basic.4y                4176
basic.6y                2292
unknown                 1731
illiterate                18
Name: education, dtype: int64
------------------------------------------------
default 


no         32588
unknown     8597
yes            3
Name: default, dtype: int64
------------------------------------------------
housing 


yes        21576
no         18622
unknown      990
Name: housing, dtype: int64
------------------------------------------------
loan 


no         33950
yes         6248
unknown      990
Name: loan, dtype: int64
------------------------------------------------
contact 


cellular     26144
telephone    15044
Name: contact, dtype: int64
------------------------------------------------
month 


may    13769
jul     7174
aug     6178
jun     5318
nov     4101
apr     2632
oct      718
sep      570
mar      546
dec      182
Name: month, dtype: int64
------------------------------------------------
day_of_week 


thu    8623
mon    8514
wed    8134
tue    8090
fri    7827
Name: day_of_week, dtype: int64
------------------------------------------------
poutcome 


nonexistent    35563
failure         4252
success         1373
Name: poutcome, dtype: int64
------------------------------------------------
y 


no     36548
yes     4640
Name: y, dtype: int64
------------------------------------------------

Need to edit this graph

In [93]:
#Checking the data's target variable to check the data distribution in the column y:

Not_Subscribed = len(data[data['y'] == 'no'])
Subscribed = len(data[data['y']== 'yes'])

percentage_NS = (Not_Subscribed/len(data['y']))*100
percentage_Sub = (Subscribed/len(data['y']))*100

print('% of People Subscribed:', percentage_Sub)
print('% of People who dint subscribed:', percentage_NS)

data['y'].value_counts().plot.bar()
% of People Subscribed: 11.265417111780131
% of People who dint subscribed: 88.73458288821988
Out[93]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe1abb71278>
In [91]:
Not_Subscribed = len(data[data['y'] == 'no'])
Subscribed = len(data[data['y']== 'yes'])
In [92]:
Not_Subscribed
Out[92]:
36548
In [94]:
# Viz categorical data 

for column in category_column:
    pd.crosstab(data[column], data.y).plot(kind = 'bar')
    plt.title(column)
In [98]:
sns.distplot(data['age'])
Out[98]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe1ac409b38>
In [11]:
# Data Visualization : Individual columns grAPHS - undersatdning number of people in the categories

sns.set(style = 'ticks', color_codes= True)
sns.countplot(y='job', data = data)
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdcbe2517f0>
In [15]:
data = data[data.job != 'unknown']
In [16]:
sns.countplot(y = 'marital', data = data)
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdcbe030358>
In [17]:
data.marital.value_counts()
Out[17]:
married     24694
single      11494
divorced     4599
unknown        71
Name: marital, dtype: int64
In [18]:
data = data[data.marital != 'Unknown']
data = data[data.loan != 'unknown']
In [19]:
sns.countplot(y='education', data = data)
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdcbe952080>
In [45]:
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.barplot(x=data['y'], y=data['age'])
Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdcbe97e160>
In [46]:
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.barplot(x=data['y'], y=data['campaign'])
Out[46]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdcbe97e128>
In [47]:
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['job'])
Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca48ef1d0>
In [49]:
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['marital'])
Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca48ef198>
In [50]:
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['education'])
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca4c8f080>
In [51]:
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['housing'])
Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca47a92e8>
In [52]:
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['loan'])
Out[52]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca4cbd320>
In [53]:
# using heatmap to understand the ocrelatiob between numerical values 

plt.figure(figsize=(14,7))
cor = data.corr()
sns.heatmap(cor, annot = True)
plt.show()
In [55]:
# Was the campaign succesfull ?
(data['y'] =='yes').sum()
Out[55]:
4640
In [59]:
# Was the campaign succesfull ?
(data['y'] =='yes').value_counts()
Out[59]:
False    36548
True      4640
Name: y, dtype: int64

Visualizing the data accordingly - bar charts to understand the column distribution and countplot to undestand the numbe of people in each column who have subscribed or not

Github : https://github.com/sukanta-27/Predicting-Success-of-Bank-Telemarketing/blob/master/Jupyter%20Notebook/Bank_Marketing_CaseStudy.ipynb

In [78]:
# JOB :
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['job'])
Out[78]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca76547f0>
In [79]:
# Easy count plots:
sns.countplot(y='job', hue= 'y', data = data)
plt.show()
In [80]:
# Easy count plots:
sns.countplot(x='job', hue= 'y', data = data)
plt.show()
In [81]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['marital'])
Out[81]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca7858358>
In [82]:
# Easy count plots:
sns.countplot(x='marital', hue= 'y', data = data)
plt.show()
In [85]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['default'])
Out[85]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca4e405f8>
In [83]:
# Easy count plots:
sns.countplot(x='default', hue= 'y', data = data)
plt.show()
In [86]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['education'])
Out[86]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca762a668>
In [87]:
# Easy count plots:
sns.countplot(x='education', hue= 'y', data = data)
plt.show()
In [88]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['housing'])
Out[88]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca77eb400>
In [89]:
# Easy count plots:
sns.countplot(x='housing', hue= 'y', data = data)
plt.show()
In [91]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['loan'])
Out[91]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca741c2e8>
In [90]:
# Easy count plots:
sns.countplot(x='loan', hue= 'y', data = data)
plt.show()
In [92]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['contact'])
Out[92]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca7a13da0>
In [93]:
# Easy count plots:
sns.countplot(x='contact', hue= 'y', data = data)
plt.show()
In [95]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['month'])
Out[95]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca74728d0>
In [94]:
# Easy count plots:
sns.countplot(x='month', hue= 'y', data = data)
plt.show()
In [96]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['day_of_week'])
Out[96]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca70995c0>
In [97]:
# Easy count plots:
sns.countplot(x='day_of_week', hue= 'y', data = data)
plt.show()
In [98]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['poutcome'])
Out[98]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca4efa0f0>
In [99]:
# Easy count plots:
sns.countplot(x='poutcome', hue= 'y', data = data)
plt.show()
In [100]:
%matplotlib inline
In [101]:
sns.boxplot(data = data, x = 'y', y = 'age')
plt.show()

From the above boxplot we know that for both the customers that subscibed or didn't subscribe a term deposit, has a median age of around 38-40. And the boxplot for both the classes overlap quite a lot, which means that age isn't necessarily a good indicator for which customer will subscribe and which customer will not.

In [102]:
plt.figure(figsize=(10,8))
sns.distplot(data["age"])
Out[102]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdca8aa4780>

As we can see in the above distribution also, that most of the customers are in the age range of 30-40.

In [103]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="duration")
plt.show()

From the above plot it is clear that, the duration (last contact duration) of a customer can be useful for predicting the target variable. It is expected because it is already mentioned in the data overview that this field highely affects the target variable and should only be used for benchmark purposes.

In [104]:
plt.figure(figsize=(10,8))
sns.distplot(data["duration"])
plt.show()

This seems like a powerlaw distribution where most the values are very low and very few have high values.

In [107]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="campaign")
plt.show()
In [108]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data["campaign"])
plt.show()
In [109]:
data['pdays'].unique()
Out[109]:
array([999,   6,   4,   3,   5,   1,   0,  10,   7,   8,   9,  11,   2,
        12,  13,  14,  15,  16,  21,  17,  18,  22,  25,  26,  19,  27,
        20])
In [110]:
data['pdays'].value_counts()
Out[110]:
999    39673
3        439
6        412
4        118
9         64
2         61
7         60
12        58
10        52
5         46
13        36
11        28
1         26
15        24
14        20
8         18
0         15
16        11
17         8
18         7
19         3
22         3
21         2
26         1
20         1
25         1
27         1
Name: pdays, dtype: int64

Most of the values are 999, which means that the most of the customers have never been contacted before

In [112]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="pdays")
plt.show()
In [113]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data[data["y"]=="yes"]["pdays"])
sns.distplot(data[data["y"]=="no"]["pdays"])
plt.show()
In [114]:
data["previous"].unique()
Out[114]:
array([0, 1, 2, 3, 4, 5, 6, 7])
In [115]:
data["previous"].value_counts()
Out[115]:
0    35563
1     4561
2      754
3      216
4       70
5       18
6        5
7        1
Name: previous, dtype: int64
In [116]:
data[data["y"]=="yes"]["previous"].value_counts()
Out[116]:
0    3141
1     967
2     350
3     128
4      38
5      13
6       3
Name: previous, dtype: int64
In [117]:
data[data["y"]=="no"]["previous"].value_counts()
Out[117]:
0    32422
1     3594
2      404
3       88
4       32
5        5
6        2
7        1
Name: previous, dtype: int64
In [118]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="previous")
plt.show()
In [119]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data["previous"])
plt.show()
In [120]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data[data["y"]=="yes"]["previous"])
sns.distplot(data[data["y"]=="no"]["previous"])
plt.show()

The previous feature is very similarly distributed for both the classes in the target variable. From basic EDA it is not sure how much value this individual feature have on the target variable.

In [121]:
countplot('previous', data)
In [123]:
# Marital
sns.set_style('whitegrid')
plt.figure(figsize=(14,7))
sns.countplot(data['previous'])
Out[123]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdcaa67b0b8>
In [124]:
# Easy count plots:
sns.countplot(x='previous', hue= 'y', data = data)
plt.show()
In [125]:
data["emp.var.rate"].value_counts()
Out[125]:
 1.4    16234
-1.8     9184
 1.1     7763
-0.1     3683
-2.9     1663
-3.4     1071
-1.7      773
-1.1      635
-3.0      172
-0.2       10
Name: emp.var.rate, dtype: int64
In [126]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="emp.var.rate")
plt.show()
In [127]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data["emp.var.rate"])
plt.show()
In [128]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="cons.price.idx")
plt.show()
In [129]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data["cons.price.idx"])
plt.show()
In [130]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="cons.conf.idx")
plt.show()
In [131]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data["cons.conf.idx"])
plt.show()
In [132]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="euribor3m")
plt.show()
In [133]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data["euribor3m"])
plt.show()
In [134]:
%matplotlib inline
sns.boxplot(data=data, x="y", y="nr.employed")
plt.show()
In [135]:
%matplotlib inline
plt.figure(figsize=(10,8))
sns.distplot(data["nr.employed"])
plt.show()
In [ ]:
https://rstudio-pubs-static.s3.amazonaws.com/581759_628a43982b744862b56256fde2d14916.html#(22)